********************************************************************************************************************
********************************************************************************************************************
********************************************************************************************************************
* Programs "save_synth_output" and "save_synth_runner_output" to save output from synth and synth_runner.
* Output is written to the current directory. Use "cd" before calling these programs.
* save_synth_runner_output will also compute Wilson's asymptotically exact p-value and the Simes p-value.
********************************************************************************************************************
********************************************************************************************************************
********************************************************************************************************************
********************************************************************************************************************
version 17.0

/*******************************************************************************************************
* Install necessary packages if necessary

* github and rcall by Haghish (github is needed to install rcall)
net install github, from("https://haghish.github.io/github/")
github install haghish/rcall, stable
rcall: install.packages("readstata13", repos="http://cran.uk.r-project.org")
rcall_check 
rcall describe
rcall setpath "if rcall cannot detect your R installation, then set the path here to R.exe"

* Wilson's harmonic mean p-value, implemented in R.
rcall: install.packages("harmonicmeanp", repos="http://cran.uk.r-project.org")

* matsave is not used, but it was potentially useful for our purposes, so this is kept as a reminder in case we ever need it
* ssc install matsave, replace all

* estout by Ben Jann, in order to use estadd, to add results to e() matrices.
ssc install estout, replace
********************************************************************************************************/

* Even if harmonicmeanp is installed, it must still be loaded by R.
rcall: library(harmonicmeanp)

******************************************************************************************************************************************************************************************************************************************************
* Program to compute the asymptotically exact p-value (Wilson, Daniel J. "The harmonic mean p-value for combining dependent tests." Proceedings of the National Academy of Sciences 116.4 (2019): 1195-1200.)
* and the Simes p-value (Simes, R. John. "An improved Bonferroni procedure for multiple tests of significance." Biometrika 73.3 (1986): 751-754.).
* 
* This program is called by save_synth_runner_output
* The results are added to the synth_runner e() matrices.
******************************************************************************************************************************************************************************************************************************************************
capture program drop compute_AEP_and_Simes 
program define compute_AEP_and_Simes

	* We take the matrices e(pvals) and e(pvals_std), save them as ordinary matrices, then replace all p-values of
	* zero with their smallest possible non-zero values, which is 1 divided by the number of placebo tests, or 1/e(n_pl_used.)
	* 
	* Explanation why we replace all p-values of zero:
	*
	* The harmonic mean p-value is not defined for p=0. 
	*
	* And the Simes test, while technically computable for p=0, doesn't make sense for p=0. 
	* Suppose we obtained p=0 once and p=0.99 a thousand times. The Simes p-value is the minimum
	* of p * N/i (after sorting least to greatest), so if a single p-value is zero, the final value 
	* is zero as well. But intuitively, if we obtain p=0 and p=0.99 a thousand times, the overall 
	* p-value should be closer to 0.99, not zero.
	*
	* Therefore, to compute the harmonic mean p-value and Simes p-value, we replace all p-values of zero with their
	* smallest possible non-zero values, which is 1 divided by the number of placebo tests, or 1/e(n_pl_used.)
	foreach matrix_name in pvals pvals_std {
		matrix `matrix_name' = e(`matrix_name')
		local numElements = colsof(`matrix_name')
		foreach element of numlist 1(1)`numElements' {
			if `matrix_name'[1,`element'] == 0 {
				matrix `matrix_name'[1,`element'] = 1 /  e(n_pl_used)
			}
		}
	}

	* Pass our matrices (pvals, pvals_std) to R
	rcall: pvals <- st.matrix(pvals)
	rcall: pvals_std <- st.matrix(pvals_std)
		
	* Compute the asymptotically exact p-values (AEP) from the harmonic mean p-values (HMP)
	* p.hmp takes a vector of ordinary p-values and returns an asymptotically exact p-value
	* It does this by calculating the HMP (which is simply the harmonic mean of several ordinary p-values) 
	* and then integrating the Landau distribution over HMP to infinity.
	rcall: pval_AEP <- p.hmp(pvals)
	rcall: pval_std_AEP <- p.hmp(pvals_std)

	* Compute the Simes p-values 
	rcall: L <- length(pvals)
	rcall: w <- rep(1/L,L)
	rcall: pval_simes <- min(sort(pvals/w)/(1:L))
	rcall: pval_std_simes <- min(sort(pvals_std/w)/(1:L))
	
	* For convenience, display e(pval_joint_post) and e(pval_joint_post_std), to which p-values we just generated (AEP and Simes) can be compared
	disp "e(pval_joint_post): " e(pval_joint_post)
	disp "e(pval_joint_post_std): " e(pval_joint_post_std)
	
	* Add results to e matrix of current synth_runner results.
	estadd r(pval_AEP)
	estadd r(pval_std_AEP)
	estadd r(pval_simes)
	estadd r(pval_std_simes) 
	
	* Erase the temporary files created by rcall 
	capture erase ".Rdata"
	capture erase "_load.matrix.pvals_std.dta"
	
end

******************************************************************************************************************************************************************************************************************************************************
* Program to save synth output in the current directory (whatever that is)
******************************************************************************************************************************************************************************************************************************************************
capture program drop save_synth_output
program define save_synth_output
	* Save synth graph
	graph save "synth_graph", replace
	
	* Save Synth results in an Excel spreadsheet
	*Save RMSPE:
	putexcel set "synth_results.xlsx", replace sheet(RMSPE)
	putexcel (A1) = "e(RMSPE): A one by one matrix that contains the Root Mean Squared Prediction Error (RMSPE))."
	putexcel (A3) = matrix(e(RMSPE)), names overwritefmt
	*Save V-Matrix:
	putexcel set "synth_results.xlsx", modify sheet(V-Matrix)
	putexcel (A1) = "e(V_matrix): A diagonal matrix that contains the normalized variable weights in the diagonal."
	putexcel (A3) = matrix(e(V_matrix)), names overwritefmt
	*Save Indicator Balance:
	putexcel set "synth_results.xlsx", modify sheet(Indicator Balance)
	putexcel (A1) = "e(X_balance): A matrix that juxtaposes the predictor values for the unit affected by the intervention and the synthetic control unit. The matrix has two columns (treated and synthetic) and as many rows as predictors."
	putexcel (A3) = matrix(e(X_balance)), names overwritefmt
	*Save Donor Weights:
	putexcel set "synth_results.xlsx", modify sheet(Donor Weights)
	putexcel (A1) = "e(W_weights): A matrix that contains the unit numbers and unit weights, ie. the relative contribution of each control unit to the synthetic control unit.  The matrix has two column and as many rows as control units."
	putexcel (A3) = matrix(e(W_weights)), names overwritefmt
	*Save outcome for both treated and synthetic:
	putexcel set "synth_results.xlsx", modify sheet(Outcomes)
	putexcel (A1) =  "e(Y_treated): A matrix that contains the values of the response variable for the treated unit for each time period.  The matrix has one column and as many rows as time periods specified in resultsperiod()."
	putexcel (A2) = "e(Y_synthetic): A matrix that contains the values of the response variable for the synthetic control unit for each time period.  The matrix has one column and as many rows as time periods specified in resultsperiod()."
	putexcel (A4) = matrix(e(Y_treated)), names overwritefmt
	putexcel (B4) = "Y-Treated"
	putexcel (D4) = matrix(e(Y_synthetic)), names overwritefmt
	putexcel (E4) = "Y-Synthetic"
end

******************************************************************************************************************************************************************************************************************************************************
* Program to save synth_runner output in the current directory (whatever that is)
******************************************************************************************************************************************************************************************************************************************************
capture program drop save_synth_runner_output
program define save_synth_runner_output

	* Obtain the asymptotically exact p-value and the Simes p-value 
	compute_AEP_and_Simes
	
	* Generate and save synth_runner graphs
	* This is captured noisily because it cannot be run when synth_runner has multiple treated units.
	capture noisily {
		single_treatment_graphs, trlinediff(0)
			graph save raw "synth_runner_raw", replace
			graph save effects "synth_runner_effects", replace
		effect_graphs, trlinediff(0)
			graph save effect "synth_runner_effect", replace
			graph save tc "synth_runner_tc", replace
		pval_graphs
			graph save pvals "synth_runner_pvals", replace
			graph save pvals_std "synth_runner_pvals_std", replace
	}

	* Save synth_runner results
	* Save number of placebos
	putexcel set "synth_runner_results.xlsx", replace sheet(N-Placebos)
	putexcel (A1) = "e(n_pl): The number of placebo averages used for comparison. For single treatment setups, this can be used to calculate purely randomized p-values."
		putexcel (A2) = matrix(e(n_pl)), names overwritefmt
	putexcel (A5) = "e(n_pl_used): undocumented."
		putexcel (A6) = matrix(e(n_pl_used)), names overwritefmt
	putexcel (A9) = "e(failed_opt_targets): Errors when constructing the synthetic controls for non-treated units are handled gracefully. If any are detected they will be listed in this matrix. (Errors when constructing the synthetic control for treated units will abort the method.)"
		putexcel (A10) = matrix(e(failed_opt_targets)), names overwritefmt
	* Save P-Values
	putexcel set "synth_runner_results.xlsx", modify sheet(P-Values)
	putexcel (A1) = "e(pvals): A vector of the proportions of placebo effects that are at least as large as the main effect for each post-treatment period."
		putexcel (A2) = matrix(e(pvals)), names overwritefmt
	putexcel (A5) = "e(pvals_std): A vector of the proportions of placebo standardized effects that are at least as large as the main standardized effect for each post-treatment period."
		putexcel (A6) = matrix(e(pvals_std)), names overwritefmt
	putexcel (A9) = "e(pval_joint_post): The proportion of placebos that have a post-treatment RMSPE at least as large as the average for the treated units."
		putexcel (A10) = matrix(e(pval_joint_post)), names overwritefmt
	putexcel (A13) = "e(pval_joint_post_std): The proportion of placebos that have a ratio of post-treatment RMSPE over pre-treatment RMSPE at least as large as the average ratio for the treated units."
		putexcel (A14) = matrix(e(pval_joint_post_std)), names overwritefmt	
	putexcel (A17) = "e(pval_AEP): The asymptotically exact p-value (AEP) of e(pvals). The AEP is obtained by taking the harmonic mean of several ordinary p-values (HMP), and then integrating the Landua distribution over HMP to infinity."
		putexcel (A18) = "The AEP is a form of meta-analysis. See Wilson, Daniel J. ''The harmonic mean p-value for combining dependent tests.'' Proceedings of the National Academy of Sciences 116.4 (2019): 1195-1200."
		putexcel (A19) = "This value should be interpreted similarly to e(pval_joint_post)."
		putexcel (A20) = matrix(e(pval_AEP)), names overwritefmt	
	putexcel (A23) = "e(pval_std_AEP): Equivalent to e(pvals_AEP), but computed from e(pvals_std)."
		putexcel (A24) = "This value should be interpreted similarly to e(pval_joint_post_std)."
		putexcel (A25) = matrix(e(pval_std_AEP)), names overwritefmt
	putexcel (A28) = "e(pval_simes): The Simes p-value of e(pvals). The Simes p-value is similar to the Bonferroni correction for multiple tests, except instead of multiplying every p-value by N,"
		putexcel (A29) = "it orders p-values from smallest to largest, and multiplies each by N/i. Thus, it is less conservative, and has a lower false-negative rate than the Bonferroni correction."
		putexcel (A30) = "See Simes, R. John. ''An improved Bonferroni procedure for multiple tests of significance.'' Biometrika 73.3 (1986): 751-754."
		putexcel (A31) = "This value should be interpreted similarly to e(pval_joint_post)."
		putexcel (A32) = matrix(e(pval_simes)), names overwritefmt
	putexcel (A35) = "e(pval_std_simes): The Simes p-value of e(pvals_std)."
		putexcel (A36) = "This value should be interpreted similarly to e(pval_joint_post_std)."
		putexcel (A37) = matrix(e(pval_std_simes)), names overwritefmt	
	
	* Save measures of fit
	putexcel set "synth_runner_results.xlsx", modify sheet(Fit)
	putexcel (A1) = "e(avg_pre_rmspe_p): The proportion of placebos that have a pre-treatment RMSPE at least as large as the average of the treated units. A measure of fit. Concerning if significant."
	putexcel (A2) = matrix(e(avg_pre_rmspe_p)), names overwritefmt
	
	putexcel set "synth_runner_results.xlsx", modify sheet(Fit)
	putexcel (A5) = "e(avg_val_rmspe_p):  When specifying training_propr, this is the proportion of placebos that have a RMSPE for the validation period at least as large as the average of the treated units. A measure of fit. Concerning if significant."
	putexcel (A6) = matrix(e(avg_val_rmspe_p)), names overwritefmt
end 

************************************************************************************************************************************************************************************************************************************************
************************************************************************************************************************************************************************************************************************************************